#练习：爬论坛信息：板块、帖子标题、作者
import requests
import re
from lxml import etree


URL = 'http://47.107.178.45/phpwind/'
header = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36'}

res = requests.get(url=URL,headers=header).text


#方式2：search里面各findall填的东西一样     group不填或者为0时，表示整个正则表达式，从1开始，表示匹配到的第几个
# title1 = re.search('<a(.+?)</a>',res.text).group()

# title2 = re.search('class="st">(.+?)</a>',res.content.decode('utf-8')).group()
# print(title2)

# print(title1,title2)

HTML = etree.HTML(res)
titles = HTML.xpath('//p[@class="title"]/a[3]')
bk = HTML.xpath('//p[@class="title"]/a[2]')
for i,j in zip(titles,bk):
    print(i.text,j.text)


